load data: 209 subjects, with 864 goals are included in the following analysis
goalRating_long_R <- read.csv("./output/goalRating_long_R.csv",stringsAsFactors = F)
indivDiffDf <- read.csv("./output/indivDiffDf.csv",stringsAsFactors = F)
goalDf_sum_wide <- read.csv("./output/goalDf_wide.csv",stringsAsFactors = F)
Check the number of missing data per variable, and below is the top 5 variables. Missing data is rare for all variables
# check the number of "I'm not sure" responses per varialbe
totalGoal <- nrow(goalRating_long_R)/35
goalRating_long_R %>%
filter(is.na(rating)) %>%
tabyl(variable) %>%
mutate(percent = n/totalGoal) %>%
arrange(desc(percent)) %>%
head(5)
## variable n percent
## 1 frequency_R 6 0.006944444
## 2 attainability 5 0.005787037
## 3 attractiveness_progress 4 0.004629630
## 4 basic_needs 4 0.004629630
## 5 attainment_maintenance_R 3 0.003472222
“construal_level”,“approach_avoidance” and “attainment_maintenance” question have an option for “I’m not sure” because they ask subjects to categorilize their goals.
around 5% of the goals had “I’m not sure” as the response.The modification for construal level question seemed to be sucessful!
# check the number of "I'm not sure" responses per varialbe
goalRating_long_R %>%
filter(rating == 99) %>%
tabyl(variable) %>%
mutate(percent = n/totalGoal) %>%
arrange(desc(percent))
## variable n percent
## 1 construal_level 41 0.0474537
## 2 approach_avoidance_R 40 0.0462963
## 3 attainment_maintenance_R 40 0.0462963
around 3.3% participants select the “I’m not sure” option for construal level for more than once.
# get the number of total subject
totalSub <- nrow(indivDiffDf)
# check the percentage of participants who selected "I'm not sure" for construal level more than once
goalRating_long_R %>%
filter(rating == 99 & variable == "construal_level") %>%
tabyl(id) %>%
filter(n >1) %>%
nrow()/totalSub
## [1] 0.03349282
temporal_duration, frequency and end_state_specificity question have an option for “not specified” because they ask about features that may not be applicable to all goals.
The end state specificity is not applicable to around 17% of the goals
# check the number of "not specified" responses per varialbe
goalRating_long_R %>%
filter(rating == 999) %>%
tabyl(variable) %>%
mutate(percent = n/totalGoal) %>%
arrange(desc(percent))
## variable n percent
## 1 end_state_specificity_R 150 0.17361111
## 2 temporal_duration 62 0.07175926
## 3 frequency_R 61 0.07060185
All “I’m not sure” and “not specified” responses will be treated as missing data.
# transform 99 & 999 to NAs
goalRating_long_R <- goalRating_long_R %>%
mutate(rating = replace(rating, rating == 99 | rating == 999, NA))
Descriptive on the number of goals subject claimed to have prior to listing them
describe(goalDf_sum_wide$total_goal)
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 209 4.92 2.61 4 4.57 1.48 1 15 14 1.44 2.02 0.18
breaks = (1:15)
goalDf_sum_wide %>%
ggplot(aes(x = total_goal)) +
scale_x_continuous(labels=scales::comma(breaks, accuracy = 1), breaks=breaks) +
geom_histogram(fill = "orange",
colour = "black",
binwidth = 1) +
labs(x="Number of claimed goals", y="# of participants") +
theme_classic(base_size = 18)
The percentage of subjects who claimed having more than 5 goals: 24.4%
length(goalDf_sum_wide$total_goal[goalDf_sum_wide$total_goal>5])/totalSub
## [1] 0.2440191
Descriptive on the number of goals participants actual listed
describe(goalDf_sum_wide$listNum)
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 209 4.13 1.01 4 4.27 1.48 1 5 4 -0.81 -0.49 0.07
breaks <- (1:5)
goalDf_sum_wide %>%
ggplot(aes(x = listNum)) +
scale_x_continuous(labels=scales::comma(breaks, accuracy = 1), breaks=seq(1, 5, by = 1)) +
geom_histogram(fill = "orange",
colour = "black",
binwidth = 1) +
labs(x="Number of listed goals", y="# of participants") +
theme_classic(base_size = 18)
number of people who listed 1 goal: 1
length(goalDf_sum_wide$listNum[goalDf_sum_wide$listNum == 1])
## [1] 1
descriptvie on the differences between the number of claimed goals and listed goals
goalDf_sum_wide <-goalDf_sum_wide %>%
mutate(diffNum = total_goal - listNum)
describe(goalDf_sum_wide$diffNum)
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 209 0.79 2.11 0 0.45 0 -2 10 12 1.96 4.1 0.15
breaks <- (-2:10)
goalDf_sum_wide %>%
ggplot(aes(x = diffNum)) +
scale_x_continuous(labels=scales::comma(breaks, accuracy = 1), breaks=breaks) +
geom_histogram(fill = "orange",
colour = "black",
binwidth = 1) +
labs(x="Number of claimed goals - listed goals", y="# of participants") +
theme_classic(base_size = 18)
percentage of people who listed more goals than they claimed: 11.5%
length(goalDf_sum_wide$diffNum[goalDf_sum_wide$diffNum <0])/totalSub *100
## [1] 11.48325
percentage of people who listed less goals than they claimed: 26.8%
length(goalDf_sum_wide$diffNum[goalDf_sum_wide$diffNum >0])/totalSub *100
## [1] 26.79426
Even if both the median and the mad of the difference is 0, around 37% of the participants either had to pick 5 out of all of their goals or came up some goals on the spot. Need to be aware of the priming or the order effect.
# descriptive stats for each variable
goalRating_long_R %>%
dplyr::select(variable, rating) %>%
group_by(variable) %>%
summarize(mean = mean(rating, na.rm = TRUE),
sd = sd(rating, na.rm = TRUE),
n = n(),
min = min(rating, na.rm = TRUE),
max = max(rating, na.rm = TRUE),
skew = skew(rating, na.rm = T),
kurtosi = kurtosi(rating, na.rm = T)
) %>%
arrange(skew)
## # A tibble: 35 x 8
## variable mean sd n min max skew kurtosi
## <chr> <dbl> <dbl> <int> <int> <int> <dbl> <dbl>
## 1 approach_avoidance_R 6.16 1.50 864 1 7 -1.95 3.09
## 2 control 6.28 1.05 864 1 7 -1.58 2.29
## 3 identified_motivation 5.97 1.24 864 1 7 -1.33 1.70
## 4 ideal_motivation 5.79 1.46 864 1 7 -1.17 0.739
## 5 importance 6.06 1.16 864 1 7 -1.14 0.675
## 6 social_desirability 5.84 1.31 864 1 7 -1.12 0.858
## 7 attractiveness_achievement 6.12 0.995 864 2 7 -1.10 0.808
## 8 instrumentality 5.57 1.56 864 1 7 -1.08 0.585
## 9 commonality 5.47 1.61 864 1 7 -1.00 0.311
## 10 initial_time_R 6.31 1.83 864 1 8 -0.982 0.260
## # … with 25 more rows
# order based on their skewness
#kable(varDf[order(varDf$skew),])
The approach_avoidance has very little variance (most people rated their goals as definately approach goals). After changing the anchors for both the attractiveness variables, their distributions are similar to other variables such as important and ideal_motivation. This time “control” is the most positively skewed variable other than approach_avoidance.
Should we change the approach_avoidance to ordinal?
# histograme for each dimension
goalRating_long_R %>%
ggplot(aes(x = rating)) +
geom_histogram(fill = "orange",
colour = "black",
alpha = .6) +
facet_wrap(~variable, nrow = 7)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
“pairwise.complete.obs” is used for generating correlational matrix.The correlations make sense
# transform the long format to short format
goalDf_wide <- goalRating_long_R %>% spread (variable, rating)
# generate a correlational matrix
corrM_all <- goalDf_wide %>%
dplyr :: select(advancement:visibility) %>%
cor(use = "pairwise.complete.obs")
# visualization
corrplot(corrM_all, method = "circle",number.cex = .7, order = "AOE", addCoef.col = "black",type = "upper",col= colorRampPalette(c("midnightblue","white", "orange"))(200))
Only the 30 variables for goal representation are included. Only around 7% of the variance is on the between subject level.
# subset the long format dataset for only the 30 goal representation variable
goal_striving <- c("initial_time_R", "advancement", "urgency", "effort", "commitment")
goalDf_R_long <- goalRating_long_R[!goalRating_long_R$variable %in% goal_striving,]
# generate a multilevel model with subject as the random intercept
mlm <-lmer(rating ~ variable + (1|id), data = goalDf_R_long)
# calculate the variance partition coefficient and transform to ICC
VarCorr(mlm) %>%
as_tibble() %>%
mutate(icc=vcov/sum(vcov)) %>%
dplyr :: select(grp, icc)
## # A tibble: 2 x 2
## grp icc
## <chr> <dbl>
## 1 id 0.0694
## 2 Residual 0.931
Raw <- VarCorr(mlm) %>%
as_tibble() %>%
mutate(Raw=vcov/sum(vcov)) %>%
dplyr :: select(Raw)
27 varialbes are included. Ordinal variables are not included: “temporal_duration” & “end_state_specificity” and “frequency”
# Exclude the 5 varialbes related to goal striving progress
goalDf_R_wide <- goalDf_wide[,!names(goalDf_wide) %in% goal_striving]
# Exclude ordinal variables: temporal_duration & end_state_specificity and frequency and other columns with irrelevent data
goal_ordinal <- c("temporal_duration", "end_state_specificity_R", "frequency_R")
goalDf_EFA <- goalDf_R_wide[,!names(goalDf_R_wide) %in% goal_ordinal]
goalDf_EFA <- subset(goalDf_EFA, select = affordance : visibility)
# Generate a correlational matrix
corrM_raw <- cor(goalDf_EFA, use = "pairwise")
Both the Very Simple Structure evaluation and parallel analysis recommend 5 factors. There are 7 factors have an eigen value > 1. Therefore, models with 5, 6, and 7 factors will be explored
# use Very Simple Structure criterion
res_vss <- psych :: nfactors(corrM_raw, n = 10, rotate = "promax", diagonal = FALSE, fm = "minres",
n.obs=854,title="Very Simple Structure",use="pairwise",cor="cor")
## Loading required namespace: GPArotation
# select useful parameters and organize them into a table
cbind(1:10, res_vss$map) %>%
as.tibble() %>%
rename(., factor = V1, map = V2) %>%
cbind(., res_vss$vss.stats) %>%
select(factor, map, fit, complex, eChisq, SRMR, eCRMS, eBIC, eRMS) %>%
kable(format = "html", escape = F, caption = "VSS output after dropping 2 variables") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F,position = "center",fixed_thead = T)
## Warning: `as.tibble()` is deprecated as of tibble 2.0.0.
## Please use `as_tibble()` instead.
## The signature and semantics have changed, see `?as_tibble`.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
## Warning: The `x` argument of `as_tibble.matrix()` must have column names if `.name_repair` is omitted as of tibble 2.0.0.
## Using compatibility `.name_repair`.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
| factor | map | fit | complex | eChisq | SRMR | eCRMS | eBIC | eRMS |
|---|---|---|---|---|---|---|---|---|
| 1 | 0.0174326 | 0.5888703 | 1.000000 | 6021.2050 | 0.1002176 | 0.1043098 | 3834.2273 | 0.1002176 |
| 2 | 0.0137984 | 0.5984743 | 1.338164 | 3373.1141 | 0.0750098 | 0.0814073 | 1361.6346 | 0.0750098 |
| 3 | 0.0130132 | 0.6800032 | 1.397882 | 1947.7908 | 0.0569998 | 0.0646317 | 105.0596 | 0.0569998 |
| 4 | 0.0137251 | 0.7076522 | 1.666287 | 1168.6496 | 0.0441514 | 0.0524202 | -512.0832 | 0.0441514 |
| 5 | 0.0142129 | 0.7360075 | 1.755238 | 720.2173 | 0.0346605 | 0.0431950 | -805.2671 | 0.0346605 |
| 6 | 0.0166403 | 0.7163339 | 1.909975 | 544.6918 | 0.0301424 | 0.0395382 | -832.2942 | 0.0301424 |
| 7 | 0.0197084 | 0.6314066 | 1.794471 | 390.9319 | 0.0255360 | 0.0353656 | -844.3055 | 0.0255360 |
| 8 | 0.0220656 | 0.6080638 | 1.860011 | 277.9775 | 0.0215331 | 0.0315985 | -822.2613 | 0.0215331 |
| 9 | 0.0243186 | 0.6192687 | 1.783783 | 187.6468 | 0.0176918 | 0.0276214 | -784.3433 | 0.0176918 |
| 10 | 0.0275343 | 0.5999162 | 1.864881 | 121.8776 | 0.0142582 | 0.0237976 | -728.6137 | 0.0142582 |
# Use the Scree plot to identify the number of factors have Eigenvalues >1 and the output from the Parallel analysis
ev <- eigen(corrM_raw)
ap <- parallel(subject=nrow(goalDf_EFA),var=ncol(goalDf_EFA),
rep=100,cent=.05)
nS <- nScree(x=ev$values, aparallel=ap$eigen$qevpea)
plotnScree(nS)
Extract number of factors based on the suggestions above. Because we expect factors to be correlated with each other, we use “promax” rotation.
# extract 5 factors
fa_raw_5 <-fa(r=corrM_raw, nfactors=5,n.obs = 854, rotate="promax", SMC=FALSE, fm="minres")
#fa.sort(fa_raw_5)
# extract 6 factors
fa_raw_6 <-fa(r=corrM_raw, nfactors=6,n.obs = 854, rotate="promax", SMC=FALSE, fm="minres")
# extract 7 factors
fa_raw_7 <-fa(r=corrM_raw, nfactors=7,n.obs = 854, rotate="promax", SMC=FALSE, fm="minres")
#fa.sort(fa_raw_7)
fa.diagram(fa_raw_5)
fa.diagram(fa_raw_6)
fa.diagram(fa_raw_7)
# generate a dataframe
fa_fitDf <- data.frame(factors = c(5,6,7),
chi = c(fa_raw_5$chi,fa_raw_6$chi,fa_raw_7$chi),
BIC = c(fa_raw_5$BIC,fa_raw_6$BIC,fa_raw_7$BIC),
fit = c(fa_raw_5$fit,fa_raw_6$fit,fa_raw_7$fit),
RMSEA = c(fa_raw_5$RMSEA[1],fa_raw_6$RMSEA[1],fa_raw_7$RMSEA[1]),
cumVar = c(max(fa_raw_5$Vaccounted[3,]), max(fa_raw_6$Vaccounted[3,]),max(fa_raw_7$Vaccounted[3,])),
complexity = c(mean(fa_raw_5$complexity),mean(fa_raw_6$complexity),mean(fa_raw_7$complexity)))
fa_fitDf
## factors chi BIC fit RMSEA cumVar complexity
## 1 5 720.2173 -581.7295 0.8180758 0.06097121 0.4006152 1.755238
## 2 6 544.6918 -632.2759 0.8314411 0.05569833 0.4216004 1.909975
## 3 7 390.9319 -682.0644 0.8450613 0.04865439 0.4464410 1.794470
The most parsimonious model is pretty interpretable. However, more than 1/3 of the variables are clustered on the first factor. This may not be ideal when we explore between subject level variance. The model with 7 factors seems to load (less cross loading) better and explain a little more variance. However, the interfactor correlation may be too high. In order to expand the exploration into the factor variance, I’ll go with 7 factors for now. The additional 2 factors are attractiveness and visibility
Another issue to consider is the mix of unipolar and bipolar. Among the 27 numeric variables, we have 10 bipolar variables: are social desirability, clarity, controllability, all measures about motivations, commonality. They load on different factors on both models, so it may not be a huge issue.
Compared to the 7 factor model we saw on April 30th with 854 goals, after adding 10 more goals, “external importance” and “control” loaded with the last factor. “External importance” is loaded pretty evenlly across importance, ought and visibility (0.34, 0.35, 0.37). Control is positively loaded with measuability (0.28) and negatively loaded with visibility (-0.36). The last factor is difficult to interpret in the current model.
# organize loadings
loadings <- fa.sort(fa_raw_7)$loadings
loadings <- as.data.frame(unclass(loadings))
colnames(loadings) <- c("importance", "ought", "measuarability", "attractiveness", "commonality", "attainability", "external factor")
loadings$Variables <- rownames(loadings)
loadings.m <- loadings %>% gather(-Variables, key = "Factor", value = "Loading")
colOrder <- c("importance", "ought", "measuarability", "attractiveness", "commonality", "attainability", "external factor")
rowOrder <- rev(rownames(loadings))
loadings.m<- arrange(mutate(loadings.m,Variables=factor(Variables,leve=rowOrder)),Variables)
loadings.m<- arrange(mutate(loadings.m,Factor=factor(Factor,leve=colOrder)),Factor)
# Visualization
ggplot(loadings.m, aes(Variables, abs(Loading), fill=Loading)) +
facet_wrap(~ Factor, nrow=1) + #place the factors in separate facets
geom_bar(stat="identity") + #make the bars
coord_flip() + #flip the axes so the test names can be horizontal
#define the fill color gradient: blue=positive, red=negative
scale_fill_gradient2(name = "Loading",
high = "orange", mid = "white", low = "midnightblue",
midpoint=0, guide="colourbar") +
ylab("Loading Strength") + #improve y-axis label
ggtitle("Loadings for 7 factors") +
theme_bw(base_size=10)
# visualization
loadings <- fa.sort(fa_raw_5)$loadings
loadings <- as.data.frame(unclass(loadings))
colnames(loadings) <- c("importance", "ought", "measurability", "attainability", "commonality")
loadings$Variables <- rownames(loadings)
loadings.m <- loadings %>% gather(-Variables, key = "Factor", value = "Loading")
colOrder <- c("importance", "ought", "measurability", "attainability", "commonality")
rowOrder <- rev(rownames(loadings))
loadings.m<- arrange(mutate(loadings.m,Variables=factor(Variables,leve=rowOrder)),Variables)
loadings.m<- arrange(mutate(loadings.m,Factor=factor(Factor,leve=colOrder)),Factor)
ggplot(loadings.m, aes(Variables, abs(Loading), fill=Loading)) +
facet_wrap(~ Factor, nrow=1) + #place the factors in separate facets
geom_bar(stat="identity") + #make the bars
coord_flip() + #flip the axes so the test names can be horizontal
#define the fill color gradient: blue=positive, red=negative
scale_fill_gradient2(name = "Loading",
high = "orange", mid = "white", low = "midnightblue",
midpoint=0, guide="colourbar") +
ylab("Loading Strength") + #improve y-axis label +
ggtitle("Loadings for 5 factors") +
theme_bw(base_size=10)
# visualization
loadings <- fa.sort(fa_raw_6)$loadings
loadings <- as.data.frame(unclass(loadings))
colnames(loadings) <- c("importance", "ought", "measurability", "attainability", "other","interrelation")
loadings$Variables <- rownames(loadings)
loadings.m <- loadings %>% gather(-Variables, key = "Factor", value = "Loading")
colOrder <- c("importance", "ought", "measurability", "attainability", "other","interrelation")
rowOrder <- rev(rownames(loadings))
loadings.m<- arrange(mutate(loadings.m,Variables=factor(Variables,leve=rowOrder)),Variables)
loadings.m<- arrange(mutate(loadings.m,Factor=factor(Factor,leve=colOrder)),Factor)
ggplot(loadings.m, aes(Variables, abs(Loading), fill=Loading)) +
facet_wrap(~ Factor, nrow=1) + #place the factors in separate facets
geom_bar(stat="identity") + #make the bars
coord_flip() + #flip the axes so the test names can be horizontal
#define the fill color gradient: blue=positive, red=negative
scale_fill_gradient2(name = "Loading",
high = "orange", mid = "white", low = "midnightblue",
midpoint=0, guide="colourbar") +
ylab("Loading Strength") + #improve y-axis label +
ggtitle("Loadings for 6 factors") +
theme_bw(base_size=10)
interfactor correlation for 5-factor model: correlation between attractiveness & importance, commonality & importance and ought & commonality are high
fa_raw_7$Phi %>%
as.data.frame() %>%
dplyr::rename(importance = MR1, ought = MR2, attractiveness = MR6, measuarability = MR3, commonality = MR7, attainability = MR5, external_factor = MR4) %>%
round(.,2) %>%
remove_rownames() %>%
mutate(factor = colnames(.)) %>%
select(factor, everything()) %>%
kable(format = "html", escape = F, caption = "Interfactor Correlation") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F,position = "center",fixed_thead = T)
| factor | importance | ought | measuarability | attractiveness | commonality | attainability | external_factor |
|---|---|---|---|---|---|---|---|
| importance | 1.00 | 0.40 | 0.41 | 0.53 | 0.50 | 0.16 | 0.23 |
| ought | 0.40 | 1.00 | 0.16 | -0.05 | 0.48 | 0.13 | 0.16 |
| measuarability | 0.41 | 0.16 | 1.00 | 0.12 | 0.11 | 0.13 | 0.05 |
| attractiveness | 0.53 | -0.05 | 0.12 | 1.00 | 0.37 | 0.15 | 0.10 |
| commonality | 0.50 | 0.48 | 0.11 | 0.37 | 1.00 | 0.05 | -0.03 |
| attainability | 0.16 | 0.13 | 0.13 | 0.15 | 0.05 | 1.00 | -0.05 |
| external_factor | 0.23 | 0.16 | 0.05 | 0.10 | -0.03 | -0.05 | 1.00 |
drop appoach_avoidance_R & attainment_maintainance_R, because these 2 variables are more relavent to the phrasing/content of a goal than the perception of a goal. Morevore, the distribution of these 2 variables are very skewed towards either the unipolar or bipolar of the scale.
# regenerate the correlation matrix after dropping the itmes
goalDf_EFA_new <- goalDf_EFA %>% select(-attainment_maintenance_R, -approach_avoidance_R)
# Generate a correlational matrix
corrM_new <- cor(goalDf_EFA_new, use = "pairwise")
# use Very Simple Structure criterion
res_vss_new <- psych :: nfactors(corrM_new, n = 10, rotate = "promax", diagonal = FALSE, fm = "minres",
n.obs=854,title="Very Simple Structure",use="pairwise",cor="cor")
# select useful parameters and organize them into a table
cbind(1:10, res_vss_new$map) %>%
as.tibble() %>%
rename(., factor = V1, map = V2) %>%
cbind(., res_vss_new$vss.stats) %>%
select(factor, map, fit, complex, eChisq, SRMR, eCRMS, eBIC, eRMS) %>%
kable(format = "html", escape = F, caption = "VSS output after dropping 2 variables") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F,position = "center",fixed_thead = T)
| factor | map | fit | complex | eChisq | SRMR | eCRMS | eBIC | eRMS |
|---|---|---|---|---|---|---|---|---|
| 1 | 0.0191176 | 0.6137210 | 1.000000 | 5471.12381 | 0.1033317 | 0.1079265 | 3614.892733 | 0.1033317 |
| 2 | 0.0146593 | 0.6307921 | 1.309728 | 2886.75712 | 0.0750586 | 0.0820586 | 1192.524393 | 0.0750586 |
| 3 | 0.0140183 | 0.6867211 | 1.329494 | 1529.86393 | 0.0546414 | 0.0626780 | -9.120379 | 0.0546414 |
| 4 | 0.0159731 | 0.7296040 | 1.479139 | 1030.29860 | 0.0448412 | 0.0541133 | -360.187222 | 0.0448412 |
| 5 | 0.0163419 | 0.7394770 | 1.636797 | 593.55224 | 0.0340349 | 0.0433411 | -655.185034 | 0.0340349 |
| 6 | 0.0189263 | 0.6650507 | 1.796159 | 423.21058 | 0.0287391 | 0.0387518 | -690.528070 | 0.0287391 |
| 7 | 0.0216460 | 0.6073174 | 1.801746 | 284.33079 | 0.0235563 | 0.0337669 | -701.159164 | 0.0235563 |
| 8 | 0.0242856 | 0.6016939 | 1.665662 | 178.00597 | 0.0186386 | 0.0285344 | -685.985223 | 0.0186386 |
| 9 | 0.0278024 | 0.5964622 | 1.762507 | 113.92879 | 0.0149112 | 0.0245138 | -635.313575 | 0.0149112 |
| 10 | 0.0334881 | 0.6567501 | 1.705905 | 78.67142 | 0.0123909 | 0.0220193 | -562.572045 | 0.0123909 |
# Use the Scree plot to identify the number of factors have Eigenvalues >1 and the output from the Parallel analysis
ev <- eigen(corrM_new)
ap <- parallel(subject=nrow(goalDf_EFA_new),var=ncol(goalDf_EFA_new),
rep=100,cent=.05)
nS <- nScree(x=ev$values, aparallel=ap$eigen$qevpea)
plotnScree(nS)
Based on vss & parallel analysis, 5-factor model is recommended. There are 6 factors have an eigen value >1.
# extract 5 factors
fa_new_5 <-fa(r=corrM_new, nfactors=5,n.obs = 854, rotate="promax", SMC=FALSE, fm="minres")
#fa.sort(fa_raw_5)
# extract 6 factors
fa_new_6 <-fa(r=corrM_new, nfactors=6,n.obs = 854, rotate="promax", SMC=FALSE, fm="minres")
fa.diagram(fa_new_5)
Compared to the 5 factor model prior to dropping the 2 variables, this current model maintain the same factors. The importance factor includes an additional varialbe –intrinsic_motivation, which used to be negatively loaded with ought. The measurability factor includes an additoinal variable – afforance, which used to positively load with attainability. Social desirability used to positively load with importance, and now load with commonality.
# visualization
loadings <- fa.sort(fa_new_5)$loadings
loadings <- as.data.frame(unclass(loadings))
colnames(loadings) <- c("importance", "ought", "measurability", "commonality", "attainability")
loadings$Variables <- rownames(loadings)
loadings.m <- loadings %>% gather(-Variables, key = "Factor", value = "Loading")
colOrder <- c("importance", "ought", "measurability", "commonality", "attainability")
rowOrder <- rev(rownames(loadings))
loadings.m<- arrange(mutate(loadings.m,Variables=factor(Variables,leve=rowOrder)),Variables)
loadings.m<- arrange(mutate(loadings.m,Factor=factor(Factor,leve=colOrder)),Factor)
ggplot(loadings.m, aes(Variables, abs(Loading), fill=Loading)) +
facet_wrap(~ Factor, nrow=1) + #place the factors in separate facets
geom_bar(stat="identity") + #make the bars
coord_flip() + #flip the axes so the test names can be horizontal
#define the fill color gradient: blue=positive, red=negative
scale_fill_gradient2(name = "Loading",
high = "orange", mid = "white", low = "midnightblue",
midpoint=0, guide="colourbar") +
ylab("Loading Strength") + #improve y-axis label +
ggtitle("Loadings for 5 factors") +
theme_bw(base_size=10)
fa_new_5$Phi %>%
as.tibble() %>%
dplyr::rename(importance = MR1, ought = MR2, measuarability = MR3, commonality = MR5, attainability = MR4) %>%
round(.,2) %>%
remove_rownames() %>%
mutate(factor = colnames(.)) %>%
select(factor, everything()) %>%
kable(format = "html", escape = F, caption = "Interfactor Correlation") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F,position = "center",fixed_thead = T)
| factor | importance | ought | measuarability | commonality | attainability |
|---|---|---|---|---|---|
| importance | 1.00 | 0.32 | 0.31 | 0.15 | 0.25 |
| ought | 0.32 | 1.00 | 0.22 | 0.10 | -0.06 |
| measuarability | 0.31 | 0.22 | 1.00 | 0.11 | 0.18 |
| commonality | 0.15 | 0.10 | 0.11 | 1.00 | -0.05 |
| attainability | 0.25 | -0.06 | 0.18 | -0.05 | 1.00 |
fa.diagram(fa_new_6)
Compared to the 5 factor model(after dropping the variables), the additional factor is instrumentality (can’t find a good way to label this factor), in which all 4 variables switched from the importance factor. For the ought factor, introjected_motivation was replaced by visibility, which used to load with importance. Introjected_motivation switch to factor commonality. Affordance used to load with measurability, and now loads with attainability.
# visualization
loadings <- fa.sort(fa_new_6)$loadings
loadings <- as.data.frame(unclass(loadings))
colnames(loadings) <- c("importance", "ought", "instrumentality", "measurability", "commonality", "attainability")
loadings$Variables <- rownames(loadings)
loadings.m <- loadings %>% gather(-Variables, key = "Factor", value = "Loading")
colOrder <- c("importance", "ought", "instrumentality", "measurability", "commonality", "attainability")
rowOrder <- rev(rownames(loadings))
loadings.m<- arrange(mutate(loadings.m,Variables=factor(Variables,leve=rowOrder)),Variables)
loadings.m<- arrange(mutate(loadings.m,Factor=factor(Factor,leve=colOrder)),Factor)
ggplot(loadings.m, aes(Variables, abs(Loading), fill=Loading)) +
facet_wrap(~ Factor, nrow=1) + #place the factors in separate facets
geom_bar(stat="identity") + #make the bars
coord_flip() + #flip the axes so the test names can be horizontal
#define the fill color gradient: blue=positive, red=negative
scale_fill_gradient2(name = "Loading",
high = "orange", mid = "white", low = "midnightblue",
midpoint=0, guide="colourbar") +
ylab("Loading Strength") + #improve y-axis label +
ggtitle("Loadings for 6 factors") +
theme_bw(base_size=10)
The correlation between the new factor and the others are pretty high except with attainability, which indicates this model may not be a good choice.
fa_new_6$Phi %>%
as.tibble() %>%
dplyr::rename(importance = MR1, ought = MR2, instrumentality = MR6, measuarability = MR3, commonality = MR5, attainability = MR4) %>%
round(.,2) %>%
remove_rownames() %>%
mutate(factor = colnames(.)) %>%
select(factor, everything()) %>%
kable(format = "html", escape = F, caption = "Interfactor Correlation") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F,position = "center",fixed_thead = T)
| factor | importance | ought | instrumentality | measuarability | commonality | attainability |
|---|---|---|---|---|---|---|
| importance | 1.00 | 0.20 | 0.59 | 0.16 | 0.17 | 0.24 |
| ought | 0.20 | 1.00 | 0.39 | 0.14 | 0.19 | -0.10 |
| instrumentality | 0.59 | 0.39 | 1.00 | 0.37 | 0.44 | 0.12 |
| measuarability | 0.16 | 0.14 | 0.37 | 1.00 | 0.20 | 0.18 |
| commonality | 0.17 | 0.19 | 0.44 | 0.20 | 1.00 | -0.02 |
| attainability | 0.24 | -0.10 | 0.12 | 0.18 | -0.02 | 1.00 |
# generate a dataframe
fa_fitCom <- data.frame(factors = c(5,6,5,6),
totalVar = c(27,27,25,25),
chi = c(fa_raw_5$chi,fa_raw_6$chi,fa_new_5$chi,fa_new_6$chi),
EBIC = c(fa_raw_5$EBIC,fa_raw_6$EBIC,fa_new_5$EBIC,fa_new_6$EBIC),
fit = c(fa_raw_5$fit,fa_raw_6$fit,fa_new_5$fit,fa_new_6$fit),
RMSEA = c(fa_raw_5$RMSEA[1],fa_raw_6$RMSEA[1],fa_new_5$RMSEA[1],fa_new_6$RMSEA[1]),
cumVar = c(max(fa_raw_5$Vaccounted[3,]), max(fa_raw_6$Vaccounted[3,]),max(fa_new_5$Vaccounted[3,]),max(fa_new_6$Vaccounted[3,])),
complexity = c(mean(fa_raw_5$complexity),mean(fa_raw_6$complexity),mean(fa_new_5$complexity),mean(fa_new_6$complexity)))
fa_fitCom
## factors totalVar chi EBIC fit RMSEA cumVar complexity
## 1 5 27 720.2173 -805.2671 0.8180758 0.06097121 0.4006152 1.755238
## 2 6 27 544.6918 -832.2942 0.8314411 0.05569833 0.4216004 1.909975
## 3 5 25 593.5522 -655.1850 0.8379838 0.06335572 0.4238826 1.636797
## 4 6 25 423.2106 -690.5281 0.8518192 0.05727817 0.4460223 1.796159
Generate the factor score based on the mean
factorScoreDf <- goalDf_R_wide %>%
mutate(difficulty = 8 - difficulty) %>%
mutate(Importance = rowMeans(select(.,instrumentality, ideal_motivation, connectedness, meaningfulness, identified_motivation, importance, construal_level,attractiveness_achievement,attractiveness_progress,intrinsic_motivation,basic_needs, visibility),na.rm = T),
Ought = rowMeans(select(., ought_motivation, external_motivation ,external_importance, introjected_motivation), na.rm = T),
Measurability = rowMeans(select(., measurability , clarity , specificity , control, affordance), na.rm = T),
Commonality = rowMeans(select(., commonality , social_desirability), na.rm = T),
Attainability = rowMeans(select(., difficulty , attainability), na.rm = T)) %>%
select(id, list_goal = listNum, claim_goal = total_goal, goal_order = goal, Importance, Ought, Measurability, Commonality, Attainability)
# Importance
mlm <-lmer(Importance ~ 1 + (1|id), data = factorScoreDf)
Importance <- VarCorr(mlm) %>%
as_tibble() %>%
mutate(Importance=vcov/sum(vcov)) %>%
dplyr :: select(Importance)
# Ought
mlm <-lmer(Ought ~ 1 + (1|id), data = factorScoreDf)
Ought <- VarCorr(mlm) %>%
as_tibble() %>%
mutate(Ought=vcov/sum(vcov)) %>%
dplyr :: select(Ought)
# Measurability
mlm <-lmer(Measurability ~ 1 + (1|id), data = factorScoreDf)
Measurability <- VarCorr(mlm) %>%
as_tibble() %>%
mutate(Measurability=vcov/sum(vcov)) %>%
dplyr :: select(Measurability)
# Commonality
mlm <-lmer(Commonality ~ 1 + (1|id), data = factorScoreDf)
Commonality <- VarCorr(mlm) %>%
as_tibble() %>%
mutate(Commonality=vcov/sum(vcov)) %>%
dplyr :: select(Commonality)
# Attainability
mlm <-lmer(Attainability ~ 1 + (1|id), data = factorScoreDf)
Attainability <- VarCorr(mlm) %>%
as_tibble() %>%
mutate(Attainability=vcov/sum(vcov)) %>%
dplyr :: select(Attainability)
# combine the outputs into one data frame
factorScore_icc <- data.frame("variation" = c("between subject", "within subject"))
factorScore_icc <- bind_cols(factorScore_icc, Importance, Ought,Measurability,Commonality, Attainability, Raw)
# visualization
factorV <- factorScore_icc %>% gather(-variation, key = factor, value = ICC)
ggplot(factorV, aes(fill=variation, y=ICC, x=factor)) +
geom_bar(position="stack", stat="identity", alpha = .8) +
scale_fill_manual(values=c("dark blue", "orange")) +
ggtitle("ICC output")+
theme(axis.text.x = element_text(angle = 90, hjust = 1))
It’s interesting to see that attainability doesn’t correlate much with goal pursuit variables except advancement and advancement doesn’t correlate with the rest of factors either. Conversely, urgency, commitment and effort are highly correlated with importance, measurability.
# combine goal level data
goalLevelDf <- cbind(select(factorScoreDf, "goal_order" : "Attainability"), goalDf_wide[,c("advancement", "commitment", "effort", "urgency", "initial_time_R")])
# generate a correlational matrix
corrM_goalLevel <- cor(goalLevelDf,use = "pairwise")
res1 <- cor.mtest(goalLevelDf, conf.level = .95)
# visualization
corrplot(corrM_goalLevel, method = "circle",number.cex = .7, order = "AOE", addCoef.col = "black",type = "upper",col= colorRampPalette(c("midnightblue","white", "orange"))(200),p.mat = res1$p, sig.level = .2, tl.col = c("black", "midnightblue", "darkorange1", "darkorange1", "darkorange1", "midnightblue", "midnightblue","midnightblue","darkorange1", "midnightblue","darkorange1"))
# calculate the average score for each factor within each subject
subjectDf_factor <- aggregate(. ~ id, data = factorScoreDf, mean)
# calculate the sd for each factor wihtin each subject
subjectDf_factor_sd <- aggregate(. ~ id, data = factorScoreDf, sd) %>% select(-list_goal, -claim_goal)
subjectDf_factor_sd <- subjectDf_factor %>% select(id, list_goal, claim_goal) %>%
left_join(subjectDf_factor_sd, by = "id")
This session is for calculaing pair-wise distance between goals within each subject. Because the dimensions are not orthogonal, we use Mahalanobis distance to estimate the distance across all dimensions.
We use the covariance matrix across all goal ratings to calculate pair-wise distance. Subjects who only have 1 goal are excluded from this analysis
# set a function for calculating pairwise distiance
mahalanobisFun <- function(df, cov) {
MD <- combn(nrow(df), 2, function(x) mahalanobis(as.matrix(df[x[1],]), center = as.matrix(df[x[2],]), cov = cov))
return(tryCatch(MD, error=function(e) NULL))
}
Exclude subjects with only one goal
# exclude subjects with only one goal
id_oneGoal <- goalDf_wide$id[goalDf_wide$listNum ==1]
factorScoreDf_clean <- factorScoreDf %>% filter(!id %in% id_oneGoal) %>% select(-list_goal, -claim_goal, -goal_order)
# split the dataset by IDs and then get rid off the ID column
splitDf <- split( factorScoreDf_clean, f = factorScoreDf_clean$id)
#splitDf <- split( factorScoreDf, f = factorScoreDf$id)
splitDf <- lapply(splitDf, function(x) subset(x, select = -id))
# get the covariance matrix on factor scores across all goals
factor_cov <- cov(subset(factorScoreDf_clean, select = -id))
#factor_cov <- cov(subset(factorScoreDf, select = -id))
# apply the distance function to each subject
output <- lapply(splitDf, function(x) mahalanobisFun(x, factor_cov))
Average number of pairs per subject: 7 15 subjects only have 1 pair.
# extract distance values
distance_M <- unlist(output)
# extract the number of pairs per subject
pairNum <- lapply(output, function(x) length(as.vector(x)))
pairNum <- unlist(pairNum)
mean(pairNum)
## [1] 7.014423
# generate a pairwise data frame
id <- unique(factorScoreDf_clean$id)
id_pair <- unlist(mapply(rep, id, pairNum))
# pairId <- unlist(mapply(seq,1,pairNum))
pairDf_M <- data.frame("subject_id" = id_pair,
"distance_M" = distance_M)
The distance is negatively skewed.
The pair of goals with the least distance is “Grow my relationship”, “Grow with friends and family”; The pair of goals with the most distance is “My first goal is to get my college degree on time”, “My fourth goal is to be more present in everyday life and to stop worrying about things I cannot control”.
# descriptive of all pairwise distance
describe(pairDf_M$distance_M)
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 1459 8.04 6.15 6.44 7.16 4.89 0.07 37.03 36.96 1.47 2.45 0.16
pairDf_M %>% ggplot(aes(x = distance_M)) +
geom_histogram(fill = "orange",
colour = "black",
alpha = .6)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Around 30% of the variance in distance is on between subject level.
# generate a multilevel model with subject as the random intercept
mlm <-lmer(distance_M ~ 1 + (1|subject_id), data = pairDf_M)
# calculate the variance partition coefficient and transform to ICC
VarCorr(mlm) %>%
as_tibble() %>%
mutate(icc=vcov/sum(vcov)) %>%
dplyr :: select(grp, icc)
## # A tibble: 2 x 2
## grp icc
## <chr> <dbl>
## 1 subject_id 0.301
## 2 Residual 0.699
# calculate mean distance per subject
distDf_perSub_M <- pairDf_M %>%
group_by(subject_id) %>%
mutate(distMean = mean(distance_M),
distSd = sd(distance_M)) %>%
dplyr :: select(-distance_M)
distDf_perSub_M <- distDf_perSub_M[!duplicated(distDf_perSub_M$subject_id),]
# descriptive of subject-level mean distance
describe(distDf_perSub_M$distMean)
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 208 7.9 4.07 7.03 7.57 3.46 0.24 26.1 25.86 0.92 1.26 0.28
distDf_perSub_M %>% ggplot(aes(x = distMean)) +
geom_histogram(fill = "orange",
colour = "black",
alpha = .6)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
# descriptive of subject-level distance sd
describe(distDf_perSub_M$distSd)
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 193 4.33 2.64 3.98 4.06 2.24 0.41 12.73 12.32 0.92 0.49 0.19
distDf_perSub_M %>% ggplot(aes(x = distSd)) +
geom_histogram(fill = "orange",
colour = "black",
alpha = .6)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Both the average and variance of the distance are negatively correlated with most of the average factor scores, indicating that subjects with goals that are on average more important, common, externally motivated and attractive perceive all their goals in a more similar way.
subjectDf <- left_join(subjectDf_factor, distDf_perSub_M, by = c("id" = "subject_id")) %>%
select(-goal_order)
# generate a correlational matrix
corrM_factorMean <- cor(subjectDf[,-1],use = "pairwise")
res1 <- cor.mtest(subjectDf[,-1], conf.level = .95)
p_value <- as.data.frame(t(res1$p[c(8,9),3:7]))
# generate and present results in a table
as.data.frame(t(corrM_factorMean[c(8,9),3:7])) %>%
round(.,2) %>%
mutate(factor_mean = row.names(.)) %>%
select(factor_mean, everything()) %>%
bind_cols(p_value) %>%
mutate(
distMean = cell_spec(distMean, "html", color = ifelse(V1 < 0.05, "darkorange", "black")),
distSd = cell_spec(distSd, "html", color = ifelse(V2 < 0.05, "darkorange", "black"))
) %>%
select(-V1,-V2) %>%
kable(format = "html", escape = F) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F,position = "center")
| factor_mean | distMean | distSd |
|---|---|---|
| Importance | -0.21 | -0.18 |
| Ought | -0.22 | -0.24 |
| Measurability | -0.16 | -0.12 |
| Commonality | -0.3 | -0.33 |
| Attainability | -0.04 | -0.05 |
7 factors contribute roughly equally to the average degree and variance of the distance
# combine subject level data frame (individual differences, variance in factor scores, mean and sd in distance)
subjectDf_sd <- left_join(subjectDf_factor_sd, distDf_perSub_M, by = c("id" = "subject_id")) %>%
select(-goal_order)
# generate a correlational matrix
corrM_factorSd <- cor(subjectDf_sd[,-1],use = "pairwise")
res1 <- cor.mtest(subjectDf_sd[,-1], conf.level = .95)
p_value <- as.data.frame(t(res1$p[c(8,9),3:7]))
# generate and present results in a table
as.data.frame(t(corrM_factorSd[c(8,9),3:7])) %>%
round(.,2) %>%
mutate(factor_sd = row.names(.)) %>%
select(factor_sd, everything()) %>%
bind_cols(p_value) %>%
mutate(
distMean = cell_spec(distMean, "html", color = ifelse(V1 < 0.05, "darkorange", "black")),
distSd = cell_spec(distSd, "html", color = ifelse(V2 < 0.05, "darkorange", "black"))
) %>%
select(-V1,-V2) %>%
kable(format = "html", escape = F) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F,position = "center")
| factor_sd | distMean | distSd |
|---|---|---|
| Importance | 0.48 | 0.33 |
| Ought | 0.52 | 0.34 |
| Measurability | 0.5 | 0.4 |
| Commonality | 0.6 | 0.55 |
| Attainability | 0.53 | 0.43 |
Neither the average nor the variance of the distance show clear correlations with individual measures. The number of claimed goals is positively correlate with distance variance but the number of listed goals doesn’t have the same relationship. Some subscales from the “Goal orientation” scale and the “Contingencies of Self-Worth Scale” correlate with distance but the relationship is very weak.
subjectDf <- left_join(indivDiffDf, distDf_perSub_M, by = c("id" = "subject_id"))
subjectDf <- subjectDf_factor %>%
select(id, list_goal, claim_goal) %>%
left_join(subjectDf, by = "id")
# generate a correlational matrix
corrM_im <- cor(subjectDf[,-1],use = "pairwise")
res1 <- cor.mtest(subjectDf[,-1], conf.level = .95)
p_value <- as.data.frame(t(res1$p[c(24,25),1:23]))
# generate and present results in a table
as.data.frame(t(corrM_im[c(24,25),1:23])) %>%
round(.,2) %>%
mutate(measures = row.names(.)) %>%
select(measures, everything()) %>%
bind_cols(p_value) %>%
mutate(
distMean = cell_spec(distMean, "html", color = ifelse(V1 < 0.05, "darkorange", "black")),
distSd = cell_spec(distSd, "html", color = ifelse(V2 < 0.05, "darkorange", "black"))
) %>%
select(-V1,-V2) %>%
kable(format = "html", escape = F) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F,position = "center",fixed_thead = T)
| measures | distMean | distSd |
|---|---|---|
| list_goal | 0.1 | 0.2 |
| claim_goal | 0.11 | 0.14 |
| Extraversion_mean | -0.04 | -0.04 |
| Agreeableness_mean | -0.09 | -0.06 |
| Conscientiousness_mean | 0.05 | -0.01 |
| Neuroticism_mean | 0.05 | 0.06 |
| OpenMindedness_mean | -0.03 | -0.02 |
| BSCS_mean | 0.01 | 0.07 |
| GOS_learning | 0.13 | 0.15 |
| GOS_avoidance | -0.02 | -0.03 |
| GOS_prove | -0.08 | -0.12 |
| GSE_mean | -0.01 | -0.01 |
| LET_mean | -0.05 | -0.07 |
| PS_mean | 0.12 | 0.03 |
| RSE_mean | 0.01 | 0.04 |
| SWL_mean | -0.06 | -0.04 |
| family_mean | -0.02 | -0.07 |
| competetion_mean | 0.12 | 0.1 |
| appearance_mean | 0 | 0.01 |
| god_mean | -0.18 | -0.11 |
| academic_mean | -0.08 | -0.03 |
| virtue_mean | -0.11 | -0.05 |
| approval_mean | 0.04 | 0.02 |
Attainability positively correlates with agreeableness, conscientiousness, self-efficacy, life engagement, plantfullness, and negatively correlate with neuroticism. Self esteem and subjective well being show similar pattern: positively correlate with attainability and negatively correlate with ought.
subjectDf <- left_join(indivDiffDf, subjectDf_factor, by = "id") %>% select(-goal_order, -id)
# generate a correlational matrix
corrM_factorMean_im <- cor(subjectDf,use = "pairwise")
res1 <- cor.mtest(subjectDf, conf.level = .95)
p_value <- as.data.frame(t(res1$p[24:28,1:23]))
# generate and present results in a table
as.data.frame(t(corrM_factorMean_im[24:28,1:23])) %>%
round(.,2) %>%
mutate(measures = row.names(.)) %>%
select(measures, everything()) %>%
bind_cols(p_value) %>%
mutate(
Importance = cell_spec(Importance, "html", color = ifelse(V1 < 0.05, "darkorange", "black")),
Ought = cell_spec(Ought, "html", color = ifelse(V2 < 0.05, "darkorange", "black")),
Measurability = cell_spec(Measurability, "html", color = ifelse(V3 < 0.05, "darkorange", "black")),
Commonality = cell_spec(Commonality, "html", color = ifelse(V4 < 0.05, "darkorange", "black")),
Attainability = cell_spec(Attainability, "html", color = ifelse(V5 < 0.05, "darkorange", "black"))) %>%
select(-contains("V")) %>%
kable(format = "html", escape = F, caption = "Correlations between the average factor scores and individual measures") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F,position = "center",fixed_thead = T)
| measures | Importance | Ought | Measurability | Commonality | Attainability |
|---|---|---|---|---|---|
| Extraversion_mean | 0.15 | -0.02 | 0.14 | 0.05 | 0.07 |
| Agreeableness_mean | 0.14 | 0.01 | 0.1 | 0.09 | 0.18 |
| Conscientiousness_mean | 0.09 | -0.06 | 0.05 | -0.01 | 0.18 |
| Neuroticism_mean | -0.11 | 0.04 | -0.11 | -0.13 | -0.26 |
| OpenMindedness_mean | 0.16 | 0.09 | 0.01 | 0.05 | 0.05 |
| BSCS_mean | 0.07 | -0.04 | 0.16 | -0.09 | 0.24 |
| GOS_learning | 0.18 | -0.1 | 0.29 | -0.06 | 0.17 |
| GOS_avoidance | -0.03 | 0.23 | -0.13 | -0.04 | -0.14 |
| GOS_prove | 0.24 | 0.19 | 0.08 | 0.06 | -0.07 |
| GSE_mean | 0.21 | 0.03 | 0.25 | 0.15 | 0.31 |
| LET_mean | 0.27 | -0.1 | 0.32 | 0.13 | 0.21 |
| PS_mean | 0.15 | -0.02 | 0.21 | 0.02 | 0.17 |
| RSE_mean | 0.11 | -0.2 | 0.18 | 0.08 | 0.27 |
| SWL_mean | 0.13 | -0.11 | 0.16 | 0.11 | 0.29 |
| family_mean | 0.17 | 0.14 | 0.15 | 0.01 | 0.01 |
| competetion_mean | 0.01 | 0.1 | 0.04 | -0.05 | -0.1 |
| appearance_mean | -0.08 | 0 | 0 | -0.07 | -0.14 |
| god_mean | 0.19 | 0.09 | 0.11 | 0.16 | -0.01 |
| academic_mean | 0.13 | 0.14 | 0.09 | 0.05 | -0.21 |
| virtue_mean | 0.23 | 0.05 | 0.16 | 0.17 | 0.05 |
| approval_mean | 0 | 0.14 | -0.07 | -0.08 | -0.2 |
| list_goal | 0.02 | -0.11 | -0.14 | -0.05 | -0.04 |
| claim_goal | 0.06 | -0.09 | -0.06 | -0.07 | 0.03 |
The nubmer of listed goals is correlated with variance in measurability, commonality and attainability, but not with claimed goals.
subjectDf <- left_join(indivDiffDf, subjectDf_factor_sd, by = "id") %>% select(-goal_order, -id)
# generate a correlational matrix
corrM_factorSd_im <- cor(subjectDf,use = "pairwise")
res1 <- cor.mtest(subjectDf, conf.level = .95)
p_value <- as.data.frame(t(res1$p[24:28,1:23]))
# generate and present results in a table
as.data.frame(t(corrM_factorSd_im[24:28,1:23])) %>%
round(.,2) %>%
mutate(measures = row.names(.)) %>%
select(measures, everything()) %>%
bind_cols(p_value) %>%
mutate(
Importance = cell_spec(Importance, "html", color = ifelse(V1 < 0.05, "darkorange", "black")),
Ought = cell_spec(Ought, "html", color = ifelse(V2 < 0.05, "darkorange", "black")),
Measurability = cell_spec(Measurability, "html", color = ifelse(V3 < 0.05, "darkorange", "black")),
Commonality = cell_spec(Commonality, "html", color = ifelse(V4 < 0.05, "darkorange", "black")),
Attainability = cell_spec(Attainability, "html", color = ifelse(V5 < 0.05, "darkorange", "black"))) %>%
select(-contains("V")) %>%
kable(format = "html", escape = F, caption = "Correlations between the factor score sd and individual measures") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F,position = "center",fixed_thead = T)
| measures | Importance | Ought | Measurability | Commonality | Attainability |
|---|---|---|---|---|---|
| Extraversion_mean | -0.08 | -0.14 | -0.08 | -0.04 | 0.06 |
| Agreeableness_mean | -0.01 | -0.12 | -0.05 | -0.04 | 0.01 |
| Conscientiousness_mean | 0.04 | 0 | -0.05 | 0 | -0.05 |
| Neuroticism_mean | -0.01 | 0 | 0.06 | 0.08 | 0.07 |
| OpenMindedness_mean | -0.06 | -0.02 | 0.01 | 0.07 | 0.05 |
| BSCS_mean | -0.02 | -0.06 | -0.06 | 0.07 | -0.05 |
| GOS_learning | 0 | 0.01 | -0.05 | 0.2 | 0.05 |
| GOS_avoidance | -0.08 | 0.04 | -0.04 | 0.06 | 0 |
| GOS_prove | -0.17 | 0 | -0.1 | 0 | 0.07 |
| GSE_mean | -0.07 | -0.07 | -0.07 | 0.02 | -0.03 |
| LET_mean | -0.02 | 0.05 | -0.18 | -0.03 | 0.07 |
| PS_mean | 0.07 | 0.03 | -0.05 | 0.11 | 0 |
| RSE_mean | -0.05 | -0.01 | -0.09 | 0.01 | 0 |
| SWL_mean | 0 | -0.07 | -0.11 | -0.02 | 0.03 |
| family_mean | -0.05 | -0.08 | 0.06 | 0.09 | 0.07 |
| competetion_mean | -0.11 | 0.02 | 0.11 | 0.11 | 0.1 |
| appearance_mean | 0.05 | -0.07 | 0.08 | 0.05 | 0.01 |
| god_mean | -0.12 | -0.12 | -0.06 | -0.08 | 0.04 |
| academic_mean | -0.09 | -0.11 | 0.02 | 0.04 | 0.12 |
| virtue_mean | -0.08 | -0.06 | 0.02 | -0.05 | 0.02 |
| approval_mean | -0.04 | 0.03 | 0.12 | 0.06 | -0.04 |
| list_goal | 0.09 | 0.05 | 0.25 | 0.17 | 0.1 |
| claim_goal | 0.14 | 0.06 | 0.14 | 0.11 | 0.08 |
Exclude outliers
# extract outlier IDs for distMean
m <- mean(distDf_perSub_M$distMean, na.rm = T)
sd <- sd(distDf_perSub_M$distMean, na.rm = T)
outlier_mean <- distDf_perSub_M$subject_id[distDf_perSub_M$distMean > m + 3*sd]
# extract outlier IDs for distSd
m <- mean(distDf_perSub_M$distSd, na.rm = T)
sd <- sd(distDf_perSub_M$distSd, na.rm = T)
outlier_sd <- distDf_perSub_M$subject_id[distDf_perSub_M$distSd > m + 3*sd]
Correlation between distMean and all other measures
subjectDf <- left_join(subjectDf_factor, distDf_perSub_M, by = c("id" = "subject_id")) %>%
left_join(indivDiffDf, by = "id") %>%
filter(! id %in% outlier_mean) %>%
select(-id,-goal_order)
# generate a correlational matrix
corrM_distMean <- cor(subjectDf,use = "pairwise")
res1 <- cor.mtest(subjectDf, conf.level = .95)
# extract p value
p <- res1$p[,8]
p_value <- as.data.frame(p)
# generate table
distMean <-corrM_distMean[,"distMean"]
meanCorrDf <- as.data.frame(distMean) %>%
round(.,2) %>%
mutate(measures = row.names(.)) %>%
select(measures, everything()) %>%
bind_cols(p_value) %>%
mutate(
distMean = cell_spec(distMean, "html", color = ifelse(p < 0.05, "darkorange", "black"))) %>%
filter(!measures %in% c("distMean", "distSd")) %>%
select(-p)
Correlation between distSd and all other measures
subjectDf <- left_join(subjectDf_factor, distDf_perSub_M, by = c("id" = "subject_id")) %>%
left_join(indivDiffDf, by = "id") %>%
filter(! id %in% outlier_sd) %>%
select(-id,-goal_order)
# generate a correlational matrix
corrM_distSd <- cor(subjectDf,use = "pairwise")
res1 <- cor.mtest(subjectDf, conf.level = .95)
# extract p value
p <- res1$p[,9]
p_value <- as.data.frame(p)
# generate table
distSd <-corrM_distMean[,"distSd"]
sdCorrDf <- as.data.frame(distSd) %>%
round(.,2) %>%
mutate(measures = row.names(.)) %>%
select(measures, everything()) %>%
bind_cols(p_value) %>%
mutate(
distSd = cell_spec(distSd, "html", color = ifelse(p < 0.05, "darkorange", "black"))) %>%
filter(!measures %in% c("distMean", "distSd")) %>%
select(-p)
Correlations between distance and other subject level measures after excluding the outliers
left_join(meanCorrDf, sdCorrDf, by = "measures") %>%
kable(format = "html", escape = F) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F,position = "center",fixed_thead = T)
| measures | distMean | distSd |
|---|---|---|
| list_goal | 0.11 | 0.2 |
| claim_goal | 0.12 | 0.14 |
| Importance | -0.23 | -0.19 |
| Ought | -0.22 | -0.23 |
| Measurability | -0.14 | -0.1 |
| Commonality | -0.3 | -0.33 |
| Attainability | -0.04 | -0.05 |
| Extraversion_mean | -0.08 | -0.06 |
| Agreeableness_mean | -0.07 | -0.05 |
| Conscientiousness_mean | 0.05 | -0.01 |
| Neuroticism_mean | 0.01 | 0.04 |
| OpenMindedness_mean | -0.03 | -0.03 |
| BSCS_mean | 0.03 | 0.08 |
| GOS_learning | 0.12 | 0.15 |
| GOS_avoidance | -0.02 | -0.03 |
| GOS_prove | -0.09 | -0.12 |
| GSE_mean | -0.07 | -0.04 |
| LET_mean | -0.04 | -0.06 |
| PS_mean | 0.04 | -0.02 |
| RSE_mean | 0.01 | 0.03 |
| SWL_mean | 0 | 0 |
| family_mean | -0.05 | -0.09 |
| competetion_mean | 0.11 | 0.09 |
| appearance_mean | -0.01 | 0.01 |
| god_mean | -0.17 | -0.1 |
| academic_mean | -0.02 | 0 |
| virtue_mean | -0.05 | -0.02 |
| approval_mean | 0.04 | 0.02 |
Comparing people who were able to listed all their goals vs. those who had to choose 5 or less out of all their goals.
Seperate subjects into 3 groups (list = claimed; list < claimed; list > claimed)
# extract ID numbers
id_claimMore <- unique(goalDf_sum_wide$id[goalDf_sum_wide$diffNum > 0])
id_same <- unique(goalDf_sum_wide$id[goalDf_sum_wide$diffNum == 0])
id_listMore <- unique(goalDf_sum_wide$id[goalDf_sum_wide$diffNum < 0])
# assign group
distDf_perSub_M <- distDf_perSub_M %>%
mutate(group = case_when(
subject_id %in% id_claimMore ~ "claimMore",
subject_id %in% id_same ~ "same",
subject_id %in% id_listMore ~ "listMore"
))
Compare the average and variance of distance across 3 groups
distDf_perSub_M %>%
group_by(group) %>%
summarise(
count = n(),
distMean = mean(distMean, na.rm = TRUE),
distSd = mean(distSd, na.rm = TRUE)
) %>%
kable(format = "html", escape = F, caption = "Group mean of the average distance and distance variance") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F,position = "center",fixed_thead = T)
| group | count | distMean | distSd |
|---|---|---|---|
| claimMore | 56 | 8.510348 | 4.779654 |
| listMore | 24 | 7.080206 | 4.105739 |
| same | 128 | 7.788799 | 4.164625 |
distDf_perSub_M %>%
ggplot(aes(group, distMean)) +
geom_violin() +
stat_summary(fun=mean, geom="point", shape=23, size=2) +
ggtitle("distMean")
distDf_perSub_M %>%
ggplot(aes(group, distSd)) +
geom_violin() +
stat_summary(fun=mean, geom="point", shape=23, size=2) +
ggtitle("distSd")
## Warning: Removed 15 rows containing non-finite values (stat_ydensity).
## Warning: Removed 15 rows containing non-finite values (stat_summary).
The average distance doesn’t differenciate across groups
res.aov <- aov(distMean ~ group, data = distDf_perSub_M)
summary(res.aov)
## Df Sum Sq Mean Sq F value Pr(>F)
## group 2 39 19.29 1.164 0.314
## Residuals 205 3397 16.57
The variance in distance doesn’t differenciate across groups either
res.aov <- aov(distSd ~ group, data = distDf_perSub_M)
summary(res.aov)
## Df Sum Sq Mean Sq F value Pr(>F)
## group 2 15.4 7.722 1.106 0.333
## Residuals 190 1326.0 6.979
## 15 observations deleted due to missingness
Similar to those using all subjects, the subject-level relationship is weak. Plantfulness is positively correlated with average distance.
subjectDf <- left_join(indivDiffDf, distDf_perSub_M, by = c("id" = "subject_id"))
subjectDf <- subjectDf_factor %>%
select(id, list_goal, claim_goal) %>%
left_join(subjectDf, by = "id") %>%
filter(id %in% id_same) %>%
select(-id,-group)
# generate a correlational matrix
corrM_im <- cor(subjectDf,use = "pairwise")
res1 <- cor.mtest(subjectDf, conf.level = .95)
p_value <- as.data.frame(t(res1$p[c(24,25),1:23]))
# generate and present results in a table
as.data.frame(t(corrM_im[c(24,25),1:23])) %>%
round(.,2) %>%
mutate(measures = row.names(.)) %>%
select(measures, everything()) %>%
bind_cols(p_value) %>%
mutate(
distMean = cell_spec(distMean, "html", color = ifelse(V1 < 0.05, "darkorange", "black")),
distSd = cell_spec(distSd, "html", color = ifelse(V2 < 0.05, "darkorange", "black"))
) %>%
select(-V1,-V2) %>%
kable(format = "html", escape = F) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F,position = "center",fixed_thead = T)
| measures | distMean | distSd |
|---|---|---|
| list_goal | 0.1 | 0.25 |
| claim_goal | 0.1 | 0.25 |
| Extraversion_mean | 0.03 | -0.03 |
| Agreeableness_mean | -0.01 | 0.04 |
| Conscientiousness_mean | 0.1 | -0.01 |
| Neuroticism_mean | 0.06 | 0.09 |
| OpenMindedness_mean | 0.02 | 0 |
| BSCS_mean | 0.11 | 0.15 |
| GOS_learning | 0.18 | 0.18 |
| GOS_avoidance | -0.03 | -0.03 |
| GOS_prove | 0.04 | -0.02 |
| GSE_mean | -0.01 | -0.01 |
| LET_mean | 0.07 | 0 |
| PS_mean | 0.3 | 0.21 |
| RSE_mean | 0.08 | 0.07 |
| SWL_mean | 0.01 | 0.03 |
| family_mean | 0.04 | -0.02 |
| competetion_mean | 0.15 | 0.14 |
| appearance_mean | 0.01 | 0.09 |
| god_mean | -0.16 | -0.12 |
| academic_mean | -0.06 | 0.02 |
| virtue_mean | -0.05 | 0.04 |
| approval_mean | 0.01 | -0.03 |
Factor scores are not different across groups. The range in the same group extended to the lower end for some factor.
subjectDf_factor <- subjectDf_factor %>%
mutate(group = case_when(
id %in% id_claimMore ~ "claimMore",
id %in% id_same ~ "same",
id %in% id_listMore ~ "listMore"
))
subjectDf_factor %>%
group_by(group) %>%
summarise(
Importance = mean(Importance, na.rm = TRUE),
Ought = mean(Ought, na.rm = TRUE),
Measurability = mean(Measurability, na.rm = TRUE),
Commonality = mean(Commonality, na.rm = TRUE),
Attainability = mean(Attainability, na.rm = TRUE),
) %>%
kable(format = "html", escape = F) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F,position = "center",fixed_thead = T)
| group | Importance | Ought | Measurability | Commonality | Attainability |
|---|---|---|---|---|---|
| claimMore | 5.323194 | 3.992634 | 5.330238 | 5.58378 | 5.802827 |
| listMore | 5.350852 | 4.436285 | 5.322222 | 5.61250 | 5.788889 |
| same | 5.246713 | 4.037500 | 5.383624 | 5.70562 | 5.839277 |
subjectDf_factor %>%
gather(Importance: Attainability, key = "factor", value = "factorScore") %>%
ggplot(aes(group, factorScore)) +
geom_violin() +
stat_summary(fun=mean, geom="point", shape=23, size=2) +
facet_wrap(~factor, nrow = 4)
# compute subjective level mean and sd in connectedness, instrumentality and all goal pursuit variables
interDf_long <- goalRating_long_R %>%
filter(variable %in% c("connectedness", "instrumentality", "advancement", "urgency", "effort", "commitment", "initial_time_R")) %>%
group_by(id, variable) %>%
summarise(Mean = mean(rating, na.rm = T),
Sd = sd(rating, na.rm = T))
correlation between the means
# correlation between the means
interDf_corr_mean <- interDf_long %>%
select(-Sd) %>%
spread(variable, Mean) %>%
ungroup() %>%
select(-id) %>%
cor(use = "pairwise.complete.obs")
# visualization
corrplot(interDf_corr_mean, method = "circle",number.cex = .7, order = "AOE", addCoef.col = "black",type = "upper",col= colorRampPalette(c("midnightblue","white", "orange"))(200))